⭐ 📅 📃 👉 📖 🤦‍♂️ 🖖 🤓


1 Captain’s log

Star date 71646.61. Our mission is to use R statistical software to extract star dates from the scripts of Star Trek: The Next Generation and observe their progression over the course of the show’s seven seasons. There appears to be some mismatch in the frequency of digits after the decimal point – could this indicate poor abillity to choose random numbers? Or something more sinister? We shall venture deep into uncharted territory for answers…

2 Make it so

3 Energise

library(readr)  # read text files
library(purrr)  # iterate function over files
library(stringr)  # manipulate strings
library(dplyr)  # data manipulation and pipe opeartor (%>%)
library(ggplot2)  # plotting

4 Lieutenant Commander Data

scripts <- purrr::map(
  list.files(
    "data/scripts",
    full.names = TRUE
    ),
  read_lines # read the content
  )

5 Engage!

stardates <- stringr::str_extract_all(
  scripts,
  pattern = "date[:space:][[:digit:]\\.[:digit:]]{7}"
) %>% 
  tibble::enframe() %>% 
  tidyr::unnest() %>% 
  dplyr::transmute(
    episode = name,
    stardate = stringr::str_replace(
      string = value,
      pattern = "date ",
      replacement = ""
    )
  ) %>% 
  dplyr::mutate(
    season = as.character(
      case_when(
        episode %in% 1:25 ~ "1",
        episode %in% 26:47 ~ "2",
        episode %in% 48:73 ~ "3",
        episode %in% 74:99 ~ "4",
        episode %in% 100:125 ~ "5",
        episode %in% 126:151 ~ "6",
        episode %in% 152:176 ~ "7"
      )
    ),
    stardate = as.numeric(
      ifelse(
        test = stardate %in% c("41148..", "40052..", "37650.."),
        yes = "NA",
        no = stardate
      )
    )
  ) %>% 
  dplyr::filter(!is.na(stardate))

glimpse(stardates)
## Observations: 263
## Variables: 3
## $ episode  <int> 1, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7,...
## $ stardate <dbl> 42353.7, 42354.1, 42354.2, 42354.7, 42372.5, 41209.2,...
## $ season   <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1"...

6 On screen!

library(ggsci)
library(ggthemes)

dotplot_stardate <- stardates %>% 
  ggplot2::ggplot() +
  geom_point(aes(x = episode, y = stardate, color = season)) +
  labs(main = "Episodes are pretty much chronological") +
  theme_solarized_2(light = FALSE) + 
  scale_color_startrek()

plotly::ggplotly(dotplot_stardate, tooltip = c("stardate", "episode", "season"))

7 Decimals

Extract them.

stardates <- stardates %>% 
  mutate(
    stardate_decimal = as.numeric(
      str_sub(
        as.character(stardate),
        7,
        7
      )
    ),
    stardate_decimal = ifelse(
      is.na(stardate_decimal),
      0,
      stardate_decimal
    )
  ) %>% 
  select(season, episode, stardate, stardate_decimal)

Datatable of them.

library(DT)

stardates %>% 
  mutate(season = as.factor(season)) %>% 
  DT::datatable(
    filter = "top",
    extensions = 'Buttons',
      options = list(
        autoWidth = TRUE,  # column width consistent when making selections
        dom = "Blfrtip",
        buttons = 
          list("copy", list(
            extend = "collection",
            buttons = c("csv", "excel", "pdf"),
            text = "Download"
          ) 
          ),
        # customize the length menu
        lengthMenu = list(
          c(10, 25, 50, -1), # declare values
          c(10, 25, 50, "All") # declare titles
        ), # end of lengthMenu customization
        pageLength = 10
      )
    )

Do a baarplot.

stardates %>% 
  ggplot2::ggplot() +
  geom_bar(aes(as.character(stardate_decimal)), fill = "#CC0C00FF") +
  labs(
    main = "Stardates end",
    x = "stardate decimal value"
  ) +
  theme_dark() +
  theme_solarized_2(light = FALSE)

8 Belay that

stardates %>% 
  ggplot2::ggplot() +
  geom_bar(
    aes(as.character(stardate_decimal)),
    fill= c(
      rep("#CC0C00FF", 10),
      rep("#5C88DAFF", 9),
      rep("#84BD00FF", 10),
      rep("#FFCD00FF", 9),
      rep("#7C878EFF", 10),
      rep("#00B5E2FF", 8),
      rep("#00AF66FF", 8)
    )
  ) +
  labs(x = "stardate decimal value") +
  facet_wrap(~ season) +
  theme_solarized_2(light = FALSE) + 
  scale_color_startrek()

10 R information

sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.2             ggthemes_3.4.0     ggsci_2.8         
##  [4] bindrcpp_0.2       ggplot2_2.2.1.9000 dplyr_0.7.4       
##  [7] stringr_1.2.0      purrr_0.2.4        readr_1.1.1       
## [10] emo_0.0.0.9000    
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.15        compiler_3.4.3      plyr_1.8.4         
##  [4] bindr_0.1           tools_3.4.3         digest_0.6.13      
##  [7] viridisLite_0.2.0   jsonlite_1.5        lubridate_1.7.2    
## [10] evaluate_0.10.1     tibble_1.3.4        gtable_0.2.0       
## [13] pkgconfig_2.0.1     rlang_0.1.6         shiny_1.0.5        
## [16] crosstalk_1.0.0     yaml_2.1.16         httr_1.3.1         
## [19] knitr_1.18          htmlwidgets_0.9     hms_0.3            
## [22] rprojroot_1.2       grid_3.4.3          glue_1.2.0         
## [25] data.table_1.10.4-2 R6_2.2.2            plotly_4.7.1       
## [28] rmarkdown_1.6       tidyr_0.7.2         magrittr_1.5       
## [31] backports_1.1.1     scales_0.5.0.9000   htmltools_0.3.6    
## [34] assertthat_0.2.0    xtable_1.8-2        mime_0.5           
## [37] colorspace_1.3-2    httpuv_1.3.5        labeling_0.3       
## [40] stringi_1.1.6       lazyeval_0.2.1      munsell_0.4.3      
## [43] crayon_1.3.4

  1. The star date for today’s date (7 March 2018) as calculated using the trekguide.com method